%R code for implementing HERRA in GECCO data
%\renewcommand{\baselinestretch}{1.0}
%\setlength{\tabcolsep}{1.0mm}
\scriptsize
\begin{verbatim}
\begin{lstlisting}[language=R]
library("glmnet")
fun.confounders<-function(x,y, pf, a,b,n,lam){
  model<-glmnet(x[a:b,],y[a:b],type.gaussian="naive",lambda=lam,penalty.factor=pf)
  coef<-predict(model,type="coefficients")
  n.conf = sum(pf==0)   ### num of confounders 
  nonzerocoef<-c(seq(1:(n.conf)), which(coef[-(1:(n.conf+1))]!= 0)+n.conf)
  nh = round(n/2)
  a2<-ifelse(a==1,nh+1,1)
  b2<-ifelse(b==nh,n,nh) 
  output1<-lm(y[a2:b2]~x[a2:b2,nonzerocoef])
  print(length(nonzerocoef))
  return(summary(output1)$sigma^2)
}
#### The dataset includes d:disease status; covariates: study,age,sex; 
####  geno: genotype matrix from the top SNPs
cc = data.matrix(cbind(study,age,sex))
#### Step 1: ITRRS, iterative ridge for dimension reduction
lambda.seq = c(0.006, 0.008, 0.01, 0.0102, 0.0104)
res = matrix(0,length(lambda.seq),5)
for (lambdai in 1:5){
  xdat.all=NULL
  for (i in 1:22){ #ITRRS for each chromosome
    filename = paste("...",i,".geno",sep="") #data of chromosome i
    xdat = read.table(filename, header=T)
    xdat2 = data.matrix(xdat)
    rm(xdat)
    for (itrrs in 1:5) {
      pf<-c(rep(0,ncol(cc)),rep(1,ncol(xdat2)))
      xdat3<-cbind(cc,xdat2)
      lam<-lambda.seq[lambdai]
      if (itrrs>3) {
        cv = cv.glmnet(xdat3,d,family="gaussian",nfold=10,alpha=0,penalty.factor=pf)
        lam<-cv$lambda.min
      }
      model<-glmnet(xdat3,d,family="gaussian",type.gaussian="naive",lambda=lam,alpha=0,penalty.factor=pf)
      coef0<-predict(model,type="coefficients")
      coef = abs(coef0[-(1:(ncol(cc)+1)),])
      inout<-ifelse(coef>median(coef),1,0)
      xdat2<-xdat2[,inout==1]
      print(c(lam,median(coef), dim(xdat2)))		
    }
    xdat.all = cbind(xdat.all,xdat2)	
  }
  filename = paste("...",lambda.seq[lambdai],".Rdata",sep="")
  save(xdat.all,file=filename)
  #### END of Step 1
  #### Steps 2-4: lasso and OLS 
  xdat3 = cbind(cc, xdat.all)
  n = nrow(xdat3)
  nh = round(n/2)
  nfolds=10
  pf<-c(rep(0,ncol(cc)),rep(1,ncol(xdat.all)))
  cv <- cv.glmnet(xdat3[1:nh,],d[1:nh], nfolds=nfolds,penalty.factor=pf) 
  lam<-cv$lambda.min
  est.sig.e1ls<-fun.confounders(xdat3,d,pf,1,nh,n,lam)
  cv <- cv.glmnet(xdat3[(nh+1):n,],d[(nh+1):n],nfolds=nfolds,penalty.factor=pf) 
  lam<-cv$lambda.min
  est.sig.e2ls<-fun.confounders(xdat3,d,pf,nh+1,n,n,lam)
  est.sig.els<-(est.sig.e1ls+est.sig.e2ls)/2 
  vary = summary(lm(d~cc))$sigma^2
  h2.ls<-(vary - est.sig.els)/vary
  K<-0.004
  p<-mean(d)
  z<-dnorm(qnorm(1-K))^2
  h2.liab<-h2.ls*(K*(1-K))^2/(p*(1-p))/z
  h2.est = c(h2.ls, h2.liab)		
  res[lambdai,]=c(lambda.seq[lambdai],h2.est)
  print("Results:")
  print(c(lambda.seq[lambdai], h2.est))
}
%\end{lstlisting}
\end{verbatim}

%\clearpage